/*******************************************************************************
* NAME           : T-test among immigrants and home-country stayers            *
* AUTHOR         : Harrison Chang                                              *              
* LAST MODIFIED  : 2023/06/12                                                  *  
*                                                                              *
*                                                                              *
* PURPOSE        : This dofile compiles datasets from IPUMS and conduct        *
*                  hypothesis testing among people who immigrate and those     *
*                  who don't. We further restrict age to 25-49 and report      *
*                  education level in bins.                                    *
*                                                                              *
*******************************************************************************/

cap cd "/Users/harrison/Dropbox/Canadian_IGM"
cap cd "/Users/harrisonc/Dropbox/Canadian_IGM"

set more off



*****************************************************
*                 Non-Refugee Country               *
*****************************************************

***Philippines: IPUMS year = 1995
use "data/philippines1995.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
sort edattain
tab edattaind
do "dofile/schooling_conversion.do"

sum schooling


***UK: IPUMS year = 2001 (1991 missing too much)
use "data/uk2001.dta", clear
keep perwt age educuk
tab perwt // weight = 33.33, i.e. a 1/33 sample
keep if age<=49 & age>=25
tab educuk

gen count_niu                 = (educuk == 99)
gen count_no_schooling        = (educuk == 20 | educuk==30)
gen count_primary_complete    = (educuk == 21 | educuk==31)
gen count_lower_secondary     = (educuk == 22 | educuk==32)
gen count_secondary_complete  = (educuk == 23 | educuk==33)
gen count_university_complete = (educuk == 24 | educuk==34)
gen count_unknown             = (educuk == 29)
// no qualification = no school
// level 1          = 3 years of schooling (primary)
// level 2          = 9 years of schooling (lower secondary, or middle school)
// level 3          = 13 years of schooling (general secondar, or high school )
// level 4/5        = college and higher degree

drop if count_niu==1|count_unknown==1

gen schooling        = 0    if count_no_schooling        == 1         // no school
replace schooling    = 6    if count_primary_complete    == 1         // primary school completed
replace schooling    = 9    if count_lower_secondary     == 1         // lower secondary compeleted
replace schooling    = 12   if count_secondary_complete  == 1         // general secondary completed
replace schooling    = 16   if count_university_complete == 1         // university completed

sum schooling




***China: IPUMS year = 2000
use "data/china2000.dta", clear
keep perwt age edattaind
tab perwt // weight = 100, i.e. a 1% sample
keep if age<=49 & age>=25
sort edattaind
tab edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Poland: IPUMS year = 1988
use "data/poland1988.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling




***Jamaica: IPUMS year = 1991
use "data/jamaica1991.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 5    if edattaind == 211

sum schooling




***Pakistan: IPUMS year = 1998
use "data/pakistan1998.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Vienam: IPUMS year = 1989
use "data/vietnam1989.dta", clear
tab perwt // weight = 1.98 to 92.18
egen count_ipums = total(perwt) // population = 63M
keep if age<=49 & age>=25
tab edattaind
egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_university_complete = total(perwt) if edattaind == 400
egen count_unknown             = total(perwt) if edattaind == 999

clear

set obs 17523865
gen seq=_n
gen schooling        = 0    if seq<=1287335                       // no school
replace schooling    = 3    if seq>1287335 & seq<=6848052         // primary school less
replace schooling    = 6    if seq>6848052 & seq<=13172805        // primary school completed
replace schooling    = 9    if seq>13172805 & seq<=14909010       // lower secondary compeleted
replace schooling    = 12   if seq>14909010 & seq<=16959155       // general secondary completed
replace schooling    = 16   if seq>16959155                       // university completed

sum schooling




***USA: IPUMS year = 1980
use "data/us1980.dta", clear
tab perwt // weight = 20 or 21
egen count_ipums = total(perwt) // population = 227M
keep if age<=49 & age>=25
tab edattaind

egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_university_less     = total(perwt) if edattaind == 312
egen count_university_complete = total(perwt) if edattaind == 400

clear

set obs 73785394
gen seq=_n
gen schooling        = 0    if seq<=422274                         // no school
replace schooling    = 3    if seq>422274 & seq<=1645528           // primary school less
replace schooling    = 6    if seq>1645528 & seq<=5957447          // primary school completed
replace schooling    = 9    if seq>5957447 & seq<=15191256         // lower secondary compeleted
replace schooling    = 12   if seq>15191256 & seq<=43914404        // general secondary completed
replace schooling    = 14   if seq>43914404 & seq<=58358893        // some college
replace schooling    = 16   if seq>58358893                        // university completed

sum schooling




***Romania: IPUMS year = 1992 
use "data/romania1992.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 4    if edattaind == 130

sum schooling




***Portugal: IPUMS year = 1991 
use "data/portugal1991.dta", clear
keep perwt age edattaind
tab perwt // weight = 20, i.e. a 5% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 4    if edattaind == 130
replace schooling    = 3    if edattaind == 100

sum schooling



***Haiti: IPUMS year = 2003
use "data/haiti2003.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Trinidad and Tobago: IPUMS year = 1990
use "data/trinidad1990.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling


***Republic of South Africa: IPUMS year = 1996
use "data/southafrica1996.dta", clear
keep perwt age edattaind
tab perwt // weight = 10.16 to 21.51
egen count_ipums = total(perwt) // population = 41M
keep if age<=49 & age>=25
tab edattaind
egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_university_complete = total(perwt) if edattaind == 400
egen count_unknown             = total(perwt) if edattaind == 999

clear

set obs 12319359
gen seq=_n
gen schooling        = 0    if seq<=1871670                        // no school
replace schooling    = 3    if seq>1871670 & seq<=3353674          // primary school less
replace schooling    = 6    if seq>3353674 & seq<=6315775          // primary school completed
replace schooling    = 9    if seq>6315775 & seq<=9380340          // lower secondary compeleted
replace schooling    = 12   if seq>9380340 & seq<=11989907         // general secondary completed
replace schooling    = 16   if seq>11989907                        // university completed

sum schooling



***Russia Federation: IPUMS year = 2002
use "data/russia2002.dta", clear
keep perwt age edattaind
tab perwt // weight = 20, i.e. a 5% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 4    if edattaind == 130

sum schooling


***Ukraine: IPUMS year = 2001
use "data/ukraine2001.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 4    if edattaind == 130

sum schooling


***France: IPUMS year = 1990
use "data/france1990.dta", clear
keep perwt age edattaind
tab perwt // weight = 24, i.e. a 1/24 sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 3    if edattaind == 100

sum schooling




***(West) Germany: IPUMS year = 1987
use "data/westgermany1987.dta", clear
keep perwt age edattaind
tab perwt // weight = 20, i.e. a 5% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling









*****************************************************
*                    Refugee Country                *
*****************************************************

***Vienam: IPUMS year = 1989
***DITTO

***Poland: IPUMS year = 1988
***DITTO

***El Salvador: IPUMS year = 1992
use "data/salvador1992.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Iran: IPUMS year = 2006
use "iran2006.dta", clear
sum perwt // weight = 31.82 to 1326.1 
egen count_ipums = total(perwt) // population = 61.5M, real population is 71M
keep if age<=49 & age>=25
tab edattaind

egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 211
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_some_college        = total(perwt) if edattaind == 312
egen count_secondary_technical = total(perwt) if edattaind == 321
egen count_university_complete = total(perwt) if edattaind == 400
egen count_unknown             = total(perwt) if edattaind == 999


clear

set obs 21525740
gen seq=_n
gen schooling        = 0    if seq<=4357638                        // no school
replace schooling    = 3    if seq>4357638 & seq<=5586379          // primary school less
replace schooling    = 5    if seq>5586379 & seq<=10578299         // primary school completed
replace schooling    = 9    if seq>10578299 & seq<=14436045          // lower secondary compeleted
replace schooling    = 12   if seq>14436045 & seq<=18736906         // general secondary completed
replace schooling    = 14   if seq>18736906 & seq<=19034978         // some college
replace schooling    = 16   if seq>19034978                        // university completed

sum schooling


***Thailand: IPUMS year = 1990
use "thailand1990.dta", clear
sum perwt // weight = 63 to 143 
egen count_ipums = total(perwt) 
keep if age<=49 & age>=25
tab edattaind

egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_lower_second_tech   = total(perwt) if edattaind == 222
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_some_college        = total(perwt) if edattaind == 312
egen count_secondary_technical = total(perwt) if edattaind == 321
egen count_post_second_tech    = total(perwt) if edattaind == 322
egen count_university_complete = total(perwt) if edattaind == 400
egen count_unknown             = total(perwt) if edattaind == 999


clear

set obs 19118255
gen seq=_n
gen schooling        = 0    if seq<=1014284                        // no school
replace schooling    = 3    if seq>1014284 & seq<=13482093          // primary school less
replace schooling    = 6    if seq>13482093 & seq<=15444840         // primary school completed
replace schooling    = 9    if seq>15444840 & seq<=16340739          // lower secondary compeleted
replace schooling    = 12   if seq>16340739 & seq<=17419349         // general secondary completed
replace schooling    = 14   if seq>17419349 & seq<=18029711         // some college
replace schooling    = 16   if seq>18029711                       // university completed

sum schooling



***Laos: IPUMS year = 2005
use "data/laos2005.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 5    if edattaind == 211

sum schooling


***Cambodia: IPUMS year = 1998
use "data/cambodia1998.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling


***Iraq: IPUMS year = 1997
use "data/iraq1997.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 3    if edattaind == 100

sum schooling

***Guatemala: IPUMS year = 1994
use "data/guatemala1994.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling


***Nicaragua: IPUMS year = 1995
use "data/nicaragua1995.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Ethiopia: IPUMS year = 1994
use "ethiopia1994.dta", clear
tab perwt // weight = 0 to 150
drop if perwt==0
egen count_ipums = total(perwt) 
keep if age<=49 & age>=25
tab edattaind

egen count_no_niu              = total(perwt) if edattaind == 0
egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_some_college        = total(perwt) if edattaind == 312
egen count_post_second_tech    = total(perwt) if edattaind == 322
egen count_university_complete = total(perwt) if edattaind == 400
egen count_unknown             = total(perwt) if edattaind == 999

clear

set obs 12965925
gen seq=_n
gen schooling        = 0    if seq<=10019718                        // no school
replace schooling    = 3    if seq>10019718 & seq<=11632559          // primary school less
replace schooling    = 6    if seq>11632559 & seq<=12049862         // primary school completed
replace schooling    = 9    if seq>12049862 & seq<=12494241          // lower secondary compeleted
replace schooling    = 12   if seq>12494241 & seq<=12806321         // general secondary completed
replace schooling    = 14   if seq>12806321 & seq<=12937024         // some college
replace schooling    = 16   if seq>12937024                       // university completed

sum schooling


***Ghana: IPUMS year = 2000
use "data/ghana2000.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling



***Hungary: IPUMS year = 1990
use "data/hungary1990.dta", clear
keep perwt age edattaind
tab perwt // weight = 20, i.e. a 5% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling




***Chile: IPUMS year = 1992
use "data/chile1992.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"

sum schooling




***Bangladesh: IPUMS year = 2001
use "data/bangladesh2001.dta", clear
keep perwt age edattaind
tab perwt // weight = 10, i.e. a 10% sample
keep if age<=49 & age>=25
tab edattaind
sort edattaind
do "dofile/schooling_conversion.do"
replace schooling    = 5    if edattaind == 211

sum schooling



***U.S.: IPUMS year = 1990
use "us1990.dta", clear
tab perwt // weight = 0 to 345
drop if perwt==0
egen count_ipums = total(perwt) 
keep if age<=49 & age>=25
tab edattaind

egen count_no_schooling        = total(perwt) if edattaind == 110
egen count_primary_less        = total(perwt) if edattaind == 120
egen count_primary_complete    = total(perwt) if edattaind == 212
egen count_lower_secondary     = total(perwt) if edattaind == 221
egen count_secondary_complete  = total(perwt) if edattaind == 311
egen count_some_college        = total(perwt) if edattaind == 312
egen count_post_second_tech    = total(perwt) if edattaind == 322
egen count_university_complete = total(perwt) if edattaind == 400

clear

set obs 94876454
gen seq=_n
gen schooling        = 0    if seq<=869923                        // no school
replace schooling    = 3    if seq>869923 & seq<=1466873          // primary school less
replace schooling    = 6    if seq>1466873 & seq<=4389453         // primary school completed
replace schooling    = 9    if seq>4389453 & seq<=11971842          // lower secondary compeleted
replace schooling    = 12   if seq>11971842 & seq<=43194278         // general secondary completed
replace schooling    = 14   if seq>43194278 & seq<=71593252         // some college
replace schooling    = 16   if seq>71593252                       // university completed

sum schooling






